<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Edge n-gram tokenizer | Elasticsearch Guide [7.7] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch Guide [7.7]">
<link rel="up" href="analysis-tokenizers.html" title="Tokenizer reference">
<link rel="prev" href="analysis-classic-tokenizer.html" title="Classic Tokenizer">
<link rel="next" href="analysis-keyword-tokenizer.html" title="Keyword Tokenizer">
<meta name="DC.type" content="Learn/Docs/Elasticsearch/Reference/7.7">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="7.7">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<strong>IMPORTANT</strong>: No additional bug fixes or documentation updates
will be released for this version. For the latest information, see the
<a href="../current/index.html">current release documentation</a>.
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch Guide [7.7]</a></span>
»
<span class="breadcrumb-link"><a href="analysis.html">Text analysis</a></span>
»
<span class="breadcrumb-link"><a href="analysis-tokenizers.html">Tokenizer reference</a></span>
»
<span class="breadcrumb-node">Edge n-gram tokenizer</span>
</div>
<div class="navheader">
<span class="prev">
<a href="analysis-classic-tokenizer.html">« Classic Tokenizer</a>
</span>
<span class="next">
<a href="analysis-keyword-tokenizer.html">Keyword Tokenizer »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="analysis-edgengram-tokenizer"></a>Edge n-gram tokenizer<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc">edit</a>
</h2>
</div></div></div>
<p>The <code class="literal">edge_ngram</code> tokenizer first breaks text down into words whenever it
encounters one of a list of specified characters, then it emits
<a href="https://en.wikipedia.org/wiki/N-gram" class="ulink" target="_top">N-grams</a> of each word where the start of
the N-gram is anchored to the beginning of the word.</p>
<p>Edge N-Grams are useful for <em>search-as-you-type</em> queries.</p>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>When you need <em>search-as-you-type</em> for text which has a widely known
order, such as movie or song titles, the
<a class="xref" href="search-suggesters.html#completion-suggester" title="Completion Suggester">completion suggester</a> is a much more efficient
choice than edge N-grams.  Edge N-grams have the advantage when trying to
autocomplete words that can appear in any order.</p>
</div>
</div>
<h3>
<a id="_example_output_10"></a>Example output<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc">edit</a>
</h3>
<p>With the default settings, the <code class="literal">edge_ngram</code> tokenizer treats the initial text as a
single token and produces N-grams with minimum length <code class="literal">1</code> and maximum length
<code class="literal">2</code>:</p>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">POST _analyze
{
  "tokenizer": "edge_ngram",
  "text": "Quick Fox"
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/864.console"></div>
<p>The above sentence would produce the following terms:</p>
<div class="pre_wrapper lang-text">
<pre class="programlisting prettyprint lang-text">[ Q, Qu ]</pre>
</div>
<div class="note admon">
<div class="icon"></div>
<div class="admon_content">
<p>These default gram lengths are almost entirely useless.  You need to
configure the <code class="literal">edge_ngram</code> before using it.</p>
</div>
</div>
<h3>
<a id="_configuration_11"></a>Configuration<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc">edit</a>
</h3>
<p>The <code class="literal">edge_ngram</code> tokenizer accepts the following parameters:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
<code class="literal">min_gram</code>
</span>
</dt>
<dd>
Minimum length of characters in a gram.  Defaults to <code class="literal">1</code>.
</dd>
<dt>
<span class="term">
<code class="literal">max_gram</code>
</span>
</dt>
<dd>
<p>Maximum length of characters in a gram.  Defaults to <code class="literal">2</code>.</p>
<p>See <a class="xref" href="analysis-edgengram-tokenizer.html#max-gram-limits" title="Limitations of the max_gram parameter">Limitations of the <code class="literal">max_gram</code> parameter</a>.</p>
</dd>
<dt>
<span class="term">
<code class="literal">token_chars</code>
</span>
</dt>
<dd>
<p>
Character classes that should be included in a token.  Elasticsearch
will split on characters that don’t belong to the classes specified.
Defaults to <code class="literal">[]</code> (keep all characters).
</p>
<p>Character classes may be any of the following:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
<code class="literal">letter</code> —      for example <code class="literal">a</code>, <code class="literal">b</code>, <code class="literal">ï</code> or <code class="literal">京</code>
</li>
<li class="listitem">
<code class="literal">digit</code> —       for example <code class="literal">3</code> or <code class="literal">7</code>
</li>
<li class="listitem">
<code class="literal">whitespace</code> —  for example <code class="literal">" "</code> or <code class="literal">"\n"</code>
</li>
<li class="listitem">
<code class="literal">punctuation</code> — for example <code class="literal">!</code> or <code class="literal">"</code>
</li>
<li class="listitem">
<code class="literal">symbol</code> —      for example <code class="literal">$</code> or <code class="literal">√</code>
</li>
<li class="listitem">
<code class="literal">custom</code> —      custom characters which need to be set using the
<code class="literal">custom_token_chars</code> setting.
</li>
</ul>
</div>
</dd>
<dt>
<span class="term">
<code class="literal">custom_token_chars</code>
</span>
</dt>
<dd>
Custom characters that should be treated as part of a token. For example,
setting this to <code class="literal">+-_</code> will make the tokenizer treat the plus, minus and
underscore sign  as part of a token.
</dd>
</dl>
</div>
<h3>
<a id="max-gram-limits"></a>Limitations of the <code class="literal">max_gram</code> parameter<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc">edit</a>
</h3>
<p>The <code class="literal">edge_ngram</code> tokenizer’s <code class="literal">max_gram</code> value limits the character length of
tokens. When the <code class="literal">edge_ngram</code> tokenizer is used with an index analyzer, this
means search terms longer than the <code class="literal">max_gram</code> length may not match any indexed
terms.</p>
<p>For example, if the <code class="literal">max_gram</code> is <code class="literal">3</code>, searches for <code class="literal">apple</code> won’t match the
indexed term <code class="literal">app</code>.</p>
<p>To account for this, you can use the
<a class="xref" href="analysis-truncate-tokenfilter.html" title="Truncate token filter"><code class="literal">truncate</code></a> token filter with a search analyzer
to shorten search terms to the <code class="literal">max_gram</code> character length. However, this could
return irrelevant results.</p>
<p>For example, if the <code class="literal">max_gram</code> is <code class="literal">3</code> and search terms are truncated to three
characters, the search term <code class="literal">apple</code> is shortened to <code class="literal">app</code>. This means searches
for <code class="literal">apple</code> return any indexed terms matching <code class="literal">app</code>, such as <code class="literal">apply</code>, <code class="literal">snapped</code>,
and <code class="literal">apple</code>.</p>
<p>We recommend testing both approaches to see which best fits your
use case and desired search experience.</p>
<h3>
<a id="_example_configuration_7"></a>Example configuration<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/7.7/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc">edit</a>
</h3>
<p>In this example, we configure the <code class="literal">edge_ngram</code> tokenizer to treat letters and
digits as tokens, and to produce grams with minimum length <code class="literal">2</code> and maximum
length <code class="literal">10</code>:</p>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">PUT my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "tokenizer": "my_tokenizer"
        }
      },
      "tokenizer": {
        "my_tokenizer": {
          "type": "edge_ngram",
          "min_gram": 2,
          "max_gram": 10,
          "token_chars": [
            "letter",
            "digit"
          ]
        }
      }
    }
  }
}

POST my_index/_analyze
{
  "analyzer": "my_analyzer",
  "text": "2 Quick Foxes."
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/865.console"></div>
<p>The above example produces the following terms:</p>
<div class="pre_wrapper lang-text">
<pre class="programlisting prettyprint lang-text">[ Qu, Qui, Quic, Quick, Fo, Fox, Foxe, Foxes ]</pre>
</div>
<p>Usually we recommend using the same <code class="literal">analyzer</code> at index time and at search
time. In the case of the <code class="literal">edge_ngram</code> tokenizer, the advice is different. It
only makes sense to use the <code class="literal">edge_ngram</code> tokenizer at index time, to ensure
that partial words are available for matching in the index. At search time,
just search for the terms the user has typed in, for instance: <code class="literal">Quick Fo</code>.</p>
<p>Below is an example of how to set up a field for <em>search-as-you-type</em>.</p>
<p>Note that the <code class="literal">max_gram</code> value for the index analyzer is <code class="literal">10</code>, which limits
indexed terms to 10 characters. Search terms are not truncated, meaning that
search terms longer than 10 characters may not match any indexed terms.</p>
<div class="pre_wrapper lang-console">
<pre class="programlisting prettyprint lang-console">PUT my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "autocomplete": {
          "tokenizer": "autocomplete",
          "filter": [
            "lowercase"
          ]
        },
        "autocomplete_search": {
          "tokenizer": "lowercase"
        }
      },
      "tokenizer": {
        "autocomplete": {
          "type": "edge_ngram",
          "min_gram": 2,
          "max_gram": 10,
          "token_chars": [
            "letter"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "analyzer": "autocomplete",
        "search_analyzer": "autocomplete_search"
      }
    }
  }
}

PUT my_index/_doc/1
{
  "title": "Quick Foxes" <a id="CO415-1"></a><i class="conum" data-value="1"></i>
}

POST my_index/_refresh

GET my_index/_search
{
  "query": {
    "match": {
      "title": {
        "query": "Quick Fo", <a id="CO415-2"></a><i class="conum" data-value="2"></i>
        "operator": "and"
      }
    }
  }
}</pre>
</div>
<div class="console_widget" data-snippet="snippets/866.console"></div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO415-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">autocomplete</code> analyzer indexes the terms <code class="literal">[qu, qui, quic, quick, fo, fox, foxe, foxes]</code>.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO415-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">autocomplete_search</code> analyzer searches for the terms <code class="literal">[quick, fo]</code>, both of which appear in the index.</p>
</td>
</tr>
</table>
</div>
</div>
<div class="navfooter">
<span class="prev">
<a href="analysis-classic-tokenizer.html">« Classic Tokenizer</a>
</span>
<span class="next">
<a href="analysis-keyword-tokenizer.html">Keyword Tokenizer »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
